# Replication Archive for: Aggarwal, Minali, Jennifer Allen, Alex Coppock, 
# Dan Frankowski, Sol Messing, Kelly Zhang, James Barnes, Andrew Beasley, 
# Harry Hantman, and Sylvan Zheng: The impact of digital campaign advertising 
# during the 2020 US presidential election: evidence from survey experiments, 
# field experiments, and a campaign-level experiment. 
# Nature Human Behavior, forthcoming.

rm(list = ls())
library(tidyverse)
library(estimatr)
# library(rdrobust)
# library(patchwork)
source('helper_file.R')

# load data and uncount
aggregated_set <- read_rds(file = "aggregated_analysis_set.rds")
main_analysis_set <- uncount(aggregated_set, weights = n)

main_analysis_set <-
  main_analysis_set %>%
  mutate(condition = factor(condition, levels = c("control", "treat")))


# linearized estimator ----------------------------------------------

formula_int_1 <- formula("cbind(voted_in_2020, voted_in_person_2020, voted_early_2020) ~ condition*tss_100") 
formula_int_2 <- formula("cbind(voted_in_2020, voted_in_person_2020, voted_early_2020) ~ condition*tss_100 + num_times_voted + pts_100")
formula_int_3 <- formula("cbind(voted_in_2020, voted_in_person_2020, voted_early_2020) ~ condition*tss_100 + pts_100 + voted_in_2000 + voted_in_2002 + voted_in_2004 + voted_in_2006 + voted_in_2008 + voted_in_2010 + voted_in_2012 + voted_in_2014 + voted_in_2016 + voted_in_2018 + strata + party_dem + party_rep + party_unknown")

fit_int_1 <-
  lm_robust(
    formula = formula_int_1,
    weights = ipw,
    data = main_analysis_set
  ) %>% tidy

fit_int_2 <-
  lm_robust(
    formula = formula_int_2,
    weights = ipw,
    data = main_analysis_set
  )%>% tidy

fit_int_3 <-
  lm_robust(
    formula = formula_int_3,
    weights = ipw,
    data = main_analysis_set
  )%>% tidy


# d-i-c by bucket ---------------------------------------------------

formula_1_dic <- formula("cbind(voted_in_2020, voted_in_person_2020, voted_early_2020) ~ condition*tss_bucket") 
formula_2_dic <- formula("cbind(voted_in_2020, voted_in_person_2020, voted_early_2020) ~ condition*tss_bucket + num_times_voted + tss_100 + pts_100")
formula_3_dic <- formula("cbind(voted_in_2020, voted_in_person_2020, voted_early_2020) ~ condition*tss_bucket + pts_100 + tss_100 + voted_in_2000 + voted_in_2002 + voted_in_2004 + voted_in_2006 + voted_in_2008 + voted_in_2010 + voted_in_2012 + voted_in_2014 + voted_in_2016 + voted_in_2018 + strata + party_dem + party_rep + party_unknown")

difference_in_cates_1 <-
  lm_robust(
    formula = formula_1_dic,
    weights = ipw,
    data = filter(main_analysis_set, tss_bucket %in% c("30 to 40", "60 to 70"))
  ) %>% 
  tidy()


difference_in_cates_2 <-
  lm_robust(
    formula = formula_2_dic,
    weights = ipw,
    data = filter(main_analysis_set, tss_bucket %in% c("30 to 40", "60 to 70"))
  )%>% 
  tidy()

difference_in_cates_3 <-
  lm_robust(
    formula = formula_3_dic,
    weights = ipw,
    data = filter(main_analysis_set, tss_bucket %in% c("30 to 40", "60 to 70"))
  )%>% 
  tidy()


gg_df_1 <-
  bind_rows(
    `Unadjusted` = fit_int_1,
    `PAP adjustment set` = fit_int_2,
    `Full adjustment set` = fit_int_3,
    .id = "estimator"
  ) %>%
  filter(term == "conditiontreat:tss_100")



gg_df_2 <-
  bind_rows(
    `Unadjusted` = difference_in_cates_1,
    `PAP adjustment set` = difference_in_cates_2,
    `Full adjustment set` = difference_in_cates_3,
    .id = "estimator"
  ) %>%
  filter(term == "conditiontreat:tss_bucket60 to 70")


gg_df <-
  bind_rows(
    `Treatment*TSS interaction term\nfrom linear model` = gg_df_1,
    `Difference-in-CATEs\nTSS 60-70 versus TSS 30-40` = gg_df_2,
    .id = "inquiry"
  ) %>%
  mutate(
    entry = make_se_entry(estimate, std.error, digits = 3),
    estimator = factor(
      estimator,
      levels = c("Unadjusted", "PAP adjustment set", "Full adjustment set")
    ),
    outcome = factor(
      outcome,
      levels = c("voted_early_2020", "voted_in_person_2020", "voted_in_2020"),
      labels = c("Voted early in 2020", "Voted in person in 2020", "Voted in 2020")
    )
  )

g <- 
  ggplot(gg_df, aes(estimate, outcome)) +
  geom_point() +
  geom_text(
    aes(label = entry, x = estimate + sign(estimate) * 0.004),
    size = 2,
    nudge_y = 0.25,
    color = gray(0.45)
  ) +
  geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
  geom_vline(xintercept = 0,
             color = "red",
             linetype = "dotted",
             alpha = 0.5) +
  facet_grid(inquiry~estimator) +
  coord_cartesian(xlim = c(-0.05, 0.05)) +
  scale_x_continuous(breaks = c(-0.02, 0.02)) +
  theme_bw() +
  theme_minimal() +
  theme(axis.title.y = element_blank(),
        panel.grid.minor.x = element_blank()) +
  xlab("Treatment effect heterogeneity estimates by Trump support")

g

#ggsave("output/figure_2.pdf", g, width = 6.5, height = 4.5)



# Table_F2 --------------------------------------------------

table_f2 <-
  gg_df |>
  select(
    inquiry,
    outcome,
    estimator,
    estimate,
    std.error,
    df,
    statistic,
    p.value,
    conf.low,
    conf.high
  ) |>
  mutate(inquiry = str_replace(
    string = inquiry,
    pattern = "\\n",
    replacement = " "
  )) |>
  arrange(inquiry, outcome, estimator) |>
  xtable(digits = c(0, 0, 0, 0, 3, 3, 0, 3, 3, 3, 3))

# print.xtable(
#   table_f2,
#   include.colnames = FALSE,
#   include.rownames = FALSE,
#   only.contents = TRUE,
#   hline.after = c(),
#   file = "output/table_F2.tex"
# )

